UMAP Visualizations
Contents
UMAP Visualizations¶
In this section, we are visualizing the encoding spaces of models in a compressed 2D/3D space to delineate the manifold structure
1) Loading Data¶
import numpy as np
import pandas as pd
import seaborn as sns
import deciphering_enigma
import matplotlib.pyplot as plt
#define the experiment config file path
path_to_config = './config.yaml'
#read the experiment config file
exp_config = deciphering_enigma.load_yaml_config(path_to_config)
dataset_path = exp_config.dataset_path
#register experiment directory and read wav files' paths
audio_files = deciphering_enigma.build_experiment(exp_config)
audio_files = [s for s in audio_files if s.endswith('mic1_normloud.wav')]
print(f'Dataset has {len(audio_files)} samples')
Dataset has 44455 samples
#extract metadata from file name convention
metadata_df, audio_format = deciphering_enigma.extract_metadata(exp_config, audio_files)
metadata_df.drop(columns=['xx', 'Label'], inplace=True)
#load audio files as torch tensors to get ready for feature extraction
audio_tensor_list = deciphering_enigma.load_dataset(audio_files, cfg=exp_config, speaker_ids=metadata_df['ID'], audio_format=audio_format)
Audio Tensors are already saved for vctk_umap_experiment
#compute the duration of each utterance to use it later in the plots
import soundfile as sf
from tqdm import tqdm
dur = []
for file in tqdm(audio_files):
audio, sr = sf.read(file)
dur.append(len(audio)/sr)
100%|████████████████████████████████████| 44455/44455 [05:10<00:00, 143.28it/s]
2) Generating Embeddings¶
#generate speech embeddings
feature_extractor = deciphering_enigma.FeatureExtractor()
embeddings_dict = feature_extractor.extract(audio_tensor_list, exp_config)
3) UMAP Dimensionality Reduction¶
import os
import numpy as np
import pandas as pd
import scipy
from scipy.spatial.distance import pdist
from umap import UMAP
from pacmap import PaCMAP
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ParameterGrid
from deciphering_enigma.settings import _hyperparams_grid_reducers, _optimize_function, _knn, _subsetsize
class ReducerTuner():
"""Tuner for dimensionality reduction methods.
Implements grid-search across hyperparameters for each dimensionality reduction method preset in the settings script.
NOTE: any method added in the settings script should follow sklearn implementation.
Tunes reduced dimensions by optimizing local and global structure metrics.
Saves tuned results for each method as a pandas dataframe.
"""
def __init__(self):
self.reducer_params_grid = _hyperparams_grid_reducers
self.optimize_func = _optimize_function
self.knn = _knn; self.subsetsize = _subsetsize
def embedding_quality(self, X, Z, knn=10, subsetsize=1000):
nbrs1 = NearestNeighbors(n_neighbors=knn).fit(X)
ind1 = nbrs1.kneighbors(return_distance=False)
nbrs2 = NearestNeighbors(n_neighbors=knn).fit(Z)
ind2 = nbrs2.kneighbors(return_distance=False)
intersections = 0.0
for i in range(X.shape[0]):
intersections += len(set(ind1[i]) & set(ind2[i]))
mnn = intersections / X.shape[0] / knn
subset = np.random.choice(X.shape[0], size=subsetsize, replace=True)
d1 = pdist(X[subset,:])
d2 = pdist(Z[subset,:])
rho = scipy.stats.spearmanr(d1[:,None],d2[:,None]).correlation
return (mnn, rho)
def get_reducer(self, name):
if name == 'PCA':
return PCA
elif name == 'tSNE':
return TSNE
elif name == 'UMAP':
return UMAP
elif name == 'PaCMAP':
return PaCMAP
else:
raise AttributeError(f'This reducer {name} is not included...')
def fit_eval(self, embeddings, reducer):
stand_embeddings = StandardScaler().fit_transform(embeddings)
reduced_embeddings = reducer.fit_transform(stand_embeddings)
local_val, global_val = self.embedding_quality(stand_embeddings, reduced_embeddings, knn=self.knn, subsetsize=self.subsetsize)
return reduced_embeddings, local_val, global_val
def save_results_pandas(self, reducers_embeddings_dict, metadata=None, model_name=None, dataset_name=None):
save_path = f'../{dataset_name}/{model_name}/dim_reduction_3d.csv'
combined_column_obj = pd.MultiIndex.from_product([reducers_embeddings_dict.keys(),['Local', 'Global'], ['Dim1', 'Dim2', 'Dim3']], names=["Method", "Optimized Metric", "Dim"])
df = pd.DataFrame(data=[], columns=combined_column_obj)
for j, name in enumerate(reducers_embeddings_dict.keys()):
global_embeddings = reducers_embeddings_dict[name]['Global']
local_embeddings = reducers_embeddings_dict[name]['Local']
df.loc[:, (name, 'Local', 'Dim1')] = local_embeddings[:,0]
df.loc[:, (name, 'Local', 'Dim2')] = local_embeddings[:,1]
df.loc[:, (name, 'Local', 'Dim3')] = local_embeddings[:,2]
df.loc[:, (name, 'Global', 'Dim1')] = global_embeddings[:,0]
df.loc[:, (name, 'Global', 'Dim2')] = global_embeddings[:,1]
df.loc[:, (name, 'Global', 'Dim3')] = global_embeddings[:,2]
temp_df = metadata.copy()
temp_df.columns = pd.MultiIndex.from_tuples(map(lambda x: (x, '', ''), temp_df.columns))
df = pd.concat([df, temp_df], axis=1)
df.to_csv(save_path)
def tune_reducer(self, embeddings, metadata=None, dataset_name=None, model_name=None, save_results = True, save_path='./'):
reducers_embeddings_dict = {}
metrics_dict = {}
df_path = f'../{dataset_name}/{model_name}/dim_reduction_3d.csv'
if os.path.isfile(df_path):
print(f'Tuned Reduced Embeddings already saved for {model_name} model!')
else:
for i, (reducer_name, reducer_params) in enumerate(self.reducer_params_grid.items()):
print(f'Reducer {i+1}/{len(self.reducer_params_grid.keys())}: {reducer_name}...')
reducers_embeddings_dict[reducer_name] = {}
reducer_object = self.get_reducer(reducer_name)
params_iterator = list(ParameterGrid(reducer_params))
all_embeddings = []; local_metrics = []; global_metrics = []
for params in params_iterator:
print(params)
reducer = reducer_object(n_components=3, random_state=42, **params)
reduced_embeddings, local_metric, global_metric = self.fit_eval(embeddings, reducer)
all_embeddings.append(reduced_embeddings); local_metrics.append(local_metric); global_metrics.append(global_metric)
max_local_idx = np.argmax(local_metrics)
max_global_idx = np.argmax(global_metrics)
metrics_dict[reducer_name] = {'Local': np.max(local_metrics), 'Global': np.max(global_metrics)}
reducers_embeddings_dict[reducer_name]['Local'] = all_embeddings[max_local_idx]
reducers_embeddings_dict[reducer_name]['Global'] = all_embeddings[max_global_idx]
if save_results:
self.save_results_pandas(reducers_embeddings_dict, metadata, model_name, dataset_name)
tuner = deciphering_enigma.ReducerTuner()
for i, model_name in enumerate(embeddings_dict.keys()):
print(f'{model_name}:')
tuner.tune_reducer(embeddings_dict[model_name], metadata=metadata_df, dataset_name=exp_config.dataset_name, model_name=model_name)
#2D interactive plot function using plotly
import plotly.express as px
import plotly.offline as py
from plotly.subplots import make_subplots
def visualize_embeddings(df, label_name, metrics=[], axis=[], acoustic_param={}, opt_structure='Local', red_name='PCA', row=1, col=1, hovertext='', label=''):
traces = px.scatter(x=df[red_name, opt_structure, 'Dim1'], y=df[red_name, opt_structure, 'Dim2'], color=df[label_name], hover_name=df['AudioNames'])
traces.layout.update(showlegend=False)
axis.add_traces(
list(traces.select_traces()),
rows=row, cols=col
)
optimize = 'Global'
label = 'ID'
1. Log-Mel-Spectrogram¶
fig = make_subplots(rows=1, cols=1)
model_name = 'Log-Mel-Spectrogram'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'BYOL-A_default'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'BYOL-I_default'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'BYOL-S_default'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'BYOL-S_cvt'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'Hybrid_BYOL-S_cvt'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'APC'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'TERA'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=2, cols=4)
model_name = 'Wav2Vec2_latent'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=4000,
height=2000, showlegend=False,)
fig.show()
fig = make_subplots(rows=2, cols=4)
model_name = 'Wav2Vec2'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=4000,
height=2000, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'HuBERT_latent'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'HuBERT_best'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'HuBERT'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'HuBERT'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
df['Scentence'] = df['AudioNames'].apply(lambda x: x.split('_')[1])
visualize_embeddings(df, 'Scentence', metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'HuBERT'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
df['Duration'] = np.log(dur)
df['Duration'] = df['Duration'].astype(float)
visualize_embeddings(df, 'Duration', metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'Data2Vec_latent'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=1000,
height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=2, cols=4)
model_name = 'Data2Vec'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
autosize=False,
width=4000,
height=2000, showlegend=False,)
fig.show()
import plotly.express as px
import plotly.offline as py
from plotly.subplots import make_subplots
def visualize_3d_embeddings(df, label_name, metrics=[], axis=[], acoustic_param={}, opt_structure='Local', red_name='PCA', row=1, col=1, hovertext='', label=''):
traces = px.scatter_3d(x=df[red_name, opt_structure, 'Dim1'], y=df[red_name, opt_structure, 'Dim2'], z=df[red_name, opt_structure, 'Dim3'], color=df[label_name], hover_name=df['AudioNames'])
traces.layout.update(showlegend=False)
model_name = 'HuBERT_best'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction_3d.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 7_level_1': '', 'Unnamed: 7_level_2': '', 'Unnamed: 8_level_1': '', 'Unnamed: 8_level_2': ''},inplace=True)
fig = px.scatter_3d(x=df['UMAP', 'Global', 'Dim1'], y=df['UMAP', 'Global', 'Dim2'], z=df['UMAP', 'Global', 'Dim3'], color=df[label], hover_name=df['AudioNames'])
fig.update_layout(
autosize=False,
width=1000,
height=1000, showlegend=False,)
fig.show()
model_name = 'TERA'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction_3d.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 7_level_1': '', 'Unnamed: 7_level_2': '', 'Unnamed: 8_level_1': '', 'Unnamed: 8_level_2': ''},inplace=True)
fig = px.scatter_3d(x=df['UMAP', 'Global', 'Dim1'], y=df['UMAP', 'Global', 'Dim2'], z=df['UMAP', 'Global', 'Dim3'], color=df[label], hover_name=df['AudioNames'])
fig.update_layout(
autosize=False,
width=1000,
height=1000, showlegend=False,)
fig.show()